clear all
tempfile tempsave
set seed ${seed}

tempfile peer
tempfile head
tempfile state

// -----------------------------------------
// School variables headmaster questionnaire
// -----------------------------------------
use using "${temp}inschool", clear
rename sschlcde scid
destring scid, replace
replace aid=string(_n) if mi(aid)

/* State */
preserve
use using "${temp}w5group", clear
gen w1state = substr(w5group1, 1, 2)
destring w1state, replace
keep aid w1state
save  "`state'", replace
restore
merge 1:1 aid using "`state'"
bysort scid: ereplace w1state=mode(w1state), maxmode

	/*Grade*/
gen grade=s3 if s3!=99 & s3!=13
recode grade (6 7 8=1)
tab grade, m

	/*Sex*/
gen sex=s2 if s2!=9

	/*(Non)-White*/
gen white=s6a

	/*Migration Background*/
gen mig=s8 if s8!=9

	/*HH Size*/
gen hhsize=s27 if s27!=7 & s27!=99

	/*Parental Education*/
gen mum_educ=s12 if s12<90
gen pop_educ=s18 if s18<90

foreach var in mum_educ pop_educ{
	
	recode `var' 10=0		// "She never went to school."
	recode `var' 1=8		// "eighth grade or less"
	recode `var' 2=10		// "more than eighth grade, but did not graduate from high school"
	recode `var' 3=12		// "high school graduate"
	recode `var' 4=12		// "completed a GED"
	recode `var' 5=13		// "went to a business, trade, or vocational school after high school"
	recode `var' 6=14		// "went to college but did not graduate"
	recode `var' 7=16		// "graduated from college or a university"
	recode `var' 8=18		// "professional training beyond a four-year college"
	
	recode `var' 11=12		// "I don't know if she went to school."
	recode `var' 9=12		// "She went to school, but I don't know what level."
}

	/*(Worse than) Single HH*/
gen single=1 if (s11==1 & s17!=1) | (s11!=1 & s17==1)
replace single=0 if single==. & (s11==1 & s17==1) 		  	/*Def. just one parent*/
replace single=1 if s11==0 & s17==0 					  	/*Less than one parent*/
replace single=1 if (s11==0 | s17==0) & (s11==. | s17==.) 	/*At most one parent*/
replace single=1 if (s11==9 | s17==9)					 	/*Multiple Responses*/
tab single, m

	/* Grade Math */
gen grade_mat=-s10b if inlist(s10a,1,2,3,4)
qui: gstats transform (standardize)	grade_mat [aw=kidwgt], by(w1state grade) replace

	/* Grade English */
gen grade_eng=-s10a if inlist(s10b,1,2,3,4)
qui: gstats transform (standardize)	grade_eng [aw=kidwgt], by(w1state grade) replace

	/* Grade Science */
gen grade_sci=-s10d if inlist(s10d,1,2,3,4)
qui: gstats transform (standardize)	grade_sci [aw=kidwgt], by(w1state grade) replace

	/*Likelihood live to 35 > 50:50*/
gen live35=1 if s45a>4 			& !missing(s45a) & s45a!=99
replace live35=0 if live35==. 	& !missing(s45a) & s45a!=99
tab live35, m

	/*Likelihood killed by 21 > 50:50*/
gen kill21=1 if s45c>4 			& !missing(s45c)  & s45c!=99
replace kill21=0 if kill21==. 	& !missing(s45c) & s45c!=99
tab kill21, m

	/*Likelihood HIV  > 50:50*/
gen hiv=1 if s45d>4 	& !missing(s45d) & s45d!=99
replace hiv=0 if hiv==. & !missing(s45d) & s45d!=99
tab hiv, m

	/*Likelihood College Graduate  > 50:50*/
gen college=1 if s45e>4 			& !missing(s45e) & s45e!=99
replace college=0 if college==. 	& !missing(s45e) & s45e!=99
tab college, m

	/*Likelihood Middle Class  > 50:50*/
gen middle30=1 if s45f>4 			& !missing(s45f) & s45f!=99
replace middle30=0 if middle30==. 	& !missing(s45f) & s45f!=99
tab middle30, m

	/*Trouble with Teachers >= Almost Everyday*/
gen troubteach=1 if s46a==3 | s46a==4 & !missing(s46a) & s46a!=9
replace troubteach=0 if troubteach==. & !missing(s46a) & s46a!=9
tab troubteach, m

	/*Trouble paying Attention >= Almost Everyday*/
gen troubatt=1 if s46b==3 | s46b==4 & !missing(s46b) & s46b!=9
replace troubatt=0 if troubatt==. 	& !missing(s46b) & s46b!=9
tab troubatt, m

	/*Trouble doing HW> Almost Everyday*/
gen troubhw=1 if s46c==3 | s46c==4 	& !missing(s46c) & s46c!=9
replace troubhw=0 if troubhw==. 	& !missing(s46c) & s46c!=9
tab troubhw, m

	/*Trouble with other students> Almost Everyday*/
gen troubstud=1 if s46d==3 | s46d==4  	& !missing(s46d) & s46d!=9
replace troubstud=0 if troubstud==. 	& !missing(s46d) & s46d!=9
tab troubstud, m

	/*Club Participation*/
gen club=s44
recode club 0=1 1=0

	/*Try hard == Very best*/
gen tryhard=1 if s48==1 		& !missing(s48) & s48!=9
replace tryhard=0 if tryhard==. & !missing(s48) & s48!=9
tab tryhard, m

	/*Skip school in last 12 months*/
gen skip=1 if s59g>0 & s59g<=6
replace skip=0 if s59g==0
tab skip, m


*CALCULATE SCHOOL AVERAGES
	*Restrict to HS students
keep if inlist(grade,9,10,11,12)

local vars 										///
sex white mig mum_educ pop_educ single hhsize 	/// socio-demographics
grade_mat grade_eng grade_sci 					/// grades
live35 kill21 hiv college middle30 				/// expectations
troubteach troubatt troubhw troubstud 			/// in school problems
club tryhard skip								// in school behaviours

foreach x of local vars{
	rename `x' peer_`x'
}

	*Leave-grade out averages (-> all students for those not in HS yet)
gcollapse grade1_peer_*=peer_* 	[aw=kidwgt], by(scid) merge wildparse
gcollapse tgrade1_peer_*=peer_* [aw=kidwgt], by(scid) merge wildparse
forvalues g=9/12{
	gcollapse grade`g'_peer_*=peer_* if grade!=`g' 	 [aw=kidwgt], by(scid) merge wildparse
}

forvalues g=9/11{
	gcollapse tgrade`g'_peer_*=peer_* if grade>`g' 	 [aw=kidwgt], by(scid) merge wildparse
}

	*Collapse to school level
gcollapse grade* tgrade*, by(scid)

*RESTRICT, LABEL AND SAVE
save  "`peer'", replace

// -----------------------------------------
// School variables headmaster questionnaire
// -----------------------------------------

*PULL DATA
local punish a31a-a31x
local track a23a-a23g
local retent a19a-a19g
local teachexp a11-a13
local teachcomp a8a-a10
local classsize a6-a7

use aschlcde atype `punish' `teachexp' `teachcomp' `classsize' `track' `retent' using "${temp}schadm1", clear
rename aschlcde scid

*CODE VARS
	/* Penalty Policies */
foreach x of varlist `punish'{
	recode `x' 1=1 2=2 3=3 4=4 5=5 6=6 7=7 97=. 99=.
	tab `x', m
}
rename a31a cheat

rename a31g alcposs
rename a31m alcdrink
rename a31i drugposs
rename a31o druguse
rename a31q smoke

rename a31c fight
rename a31e injure
rename a31k weapposs
rename a31s verbteach
rename a31u injteach
rename a31w steal

	/*Tracking*/
foreach x of varlist `track'{
	recode `x' 7=.
	tab `x', m
}
egen track=anymatch(`track'), values(1)
egen test=rownonmiss(`track')
replace track=. if test==0
drop test
tab track, m

	/*Retention*/
foreach x of varlist `retent'{
	replace `x'=. if `x'==997
	tab `x', m
}
egen retent=rowmean(`retent')
tab retent, m

	/*Polarization in Racial/Gender Composition of Teachers (Herfindahl Index)*/
foreach x of varlist `teachcomp'{
	tab `x', m
}
gen hi_race=(a8a/100)^2+(a8b/100)^2+(a8c/100)^2+(a8d/100)^2+(a8e/100)^2
sum hi_race

gen hi_hisp=(a9/100)^2+((100-a9)/100)^2
sum hi_hisp

gen hi_gender=(a10/100)^2+((100-a10)/100)^2
sum hi_gender

	/*White/Non-hisp/female teachers*/
gen teach_white=a8a
gen teach_hisp=a9
gen teach_female=a10

	/*Teacher Experience*/
foreach x of varlist `teachexp'{
	replace `x'=. if `x'==997
	tab `x', m
}
replace a11=0 if a12!=. & a11==.
replace a12=100-a11 if (a12+a11)>100

rename a11 teach_5years
rename a12 teach_new
rename a13 teach_master

	/*Class Size*/
foreach x of varlist `classsize'{
	replace `x'=. if `x'==997
	tab `x', m
}
rename a6 teach_no
rename a7 class

	/*Private school indicator*/
gen private=(atype==2)

*RESTRICT, LABEL AND SAVE
local vars ///
teach_no teach_white teach_hisp teach_female ///
class teach_5years teach_new teach_master ///
hi_race hi_hisp hi_gender ///
track retent ///
cheat fight injure alcposs drugposs weapposs alcdrink druguse smoke verbteach injteach steal ///
private

destring scid, replace
keep scid `vars'
save  "`head'", replace

// -----------------------------------------
// Merge school information
// -----------------------------------------
use "`head'", clear
merge 1:1 scid using "`peer'", keep(match master) nogen
merge 1:1 scid using "${temp}HS.dta", keep(match master) nogen
gen HsId=scid
save "${temp}schoolinfo.dta", replace

// -----------------------------------------
// Merge school information to high school assignments
// -----------------------------------------
use "${temp}HsId.dta", clear
merge m:1 HsId using "${temp}schoolinfo.dta", keep(match master) nogen

// -----------------------------------------
// Calculate peer values
// -----------------------------------------
local vars 										/// 
sex white mig mum_educ pop_educ single hhsize 	/// socio-demographics
grade_mat grade_eng grade_sci 					/// grades
live35 kill21 hiv college middle30 				/// expectations
troubteach troubatt troubhw troubstud 			/// in school problems
club tryhard skip								//  in school behaviours

foreach x of local vars{
	gen peer_`x'=.
	gen tpeer_`x'=.
	foreach g of numlist 1 9/12{
		replace peer_`x'=grade`g'_peer_`x' if grade==`g'
		drop grade`g'_peer_`x'
		if `g'<12{
			replace tpeer_`x'=tgrade`g'_peer_`x' if grade==`g'
			drop tgrade`g'_peer_`x'
		}
	}
}

// ------------------------------------------
// Calculate sanction factors (Anderson)
// ------------------------------------------
local rules cheat fight injure alcposs drugposs weapposs alcdrink druguse smoke verbteach injteach steal

foreach x in `rules'{
	gstats transform (standardize)	`x'_st=`x'
}
gen f_soc=fight_st+injure_st+weapposs_st+verbteach_st+injteach_st+steal_st
gen f_acad=cheat_st
gen f_drug=alcposs_st+drugposs_st+alcdrink_st+druguse_st+smoke_st
egen f_stri=rowtotal(`rules')

qui: gstats transform (standardize)	f_soc f_acad f_drug f_stri, replace

// ------------------------------------------
// Calculate quality factors (PCA): 1 factor (w/o str_consol)
// ------------------------------------------
pca class teach_5years teach_new teach_master, components(1)
rotate, blanks(0.30) oblimin(0) oblique normalize
estat rotatecompare
predict f_teach*
gen f_over1=f_teach1

corr f_over1 class teach_5years teach_new teach_master
drop f_teach*

// ------------------------------------------
// Calculate quality factors (Anderson 2008)
// ------------------------------------------
foreach x in class teach_5years teach_new teach_master{
	gstats transform (standardize)	`x'_st=`x'
}
gen f_over2=teach_5years_st+teach_master_st-teach_new_st-class_st 
qui: gstats transform (standardize)	f_over2, replace

corr f_over2 class teach_5years teach_new teach_master

// ---------------------------------------------
// Calculate quality factors including all (PCA)
// ---------------------------------------------
pca class teach_5years teach_new teach_master retent track f_stri, components(1)
rotate, blanks(0.30) oblimin(0) oblique normalize
estat rotatecompare
predict f
gen f_extend1=-f
drop f

pca class teach_5years teach_new teach_master private, components(1)
rotate, blanks(0.30) oblimin(0) oblique normalize
estat rotatecompare
predict f
gen f_extend2=f
drop f

pca class teach_5years teach_new teach_master teach_white teach_female, components(1)
rotate, blanks(0.30) oblimin(0) oblique normalize
estat rotatecompare
predict f
gen f_extend3=f
drop f

pca class teach_5years teach_new teach_master retent track f_stri private teach_white teach_female, components(1)
rotate, blanks(0.30) oblimin(0) oblique normalize
estat rotatecompare
predict f
gen f_extend4=f
drop f

corr f_extend* class teach_5years teach_new teach_master retent track f_stri private teach_white teach_female

// ------------------------------------------
// Save 
// ------------------------------------------
local vars ///
class teach_5years teach_new teach_master ///
cheat fight injure alcposs drugposs weapposs alcdrink druguse smoke verbteach injteach steal ///
peer_* tpeer_* ///
f_*  ///
teach_no teach_white teach_hisp teach_female retent track private ///
HsImp1 HsImp2 HsImp3 HsImp4

keep aid HsId `vars'
save  "${temp}data_schools.dta", replace


